"""
Unzip the raw data files
"""
#%%
import pandas as pd
import zipfile

data_folder = 'data_py'

#%% IMPORT FILES INTO CSV FORMAT --------------------------------------------------------
zipped_files = [
    'APDRawData',
    'BHPSRawData',
    'BoreholeRawData',
    'eWellAPDRawData',
    'eWellAPMRawData',
    'eWellEORRawData',
    'eWellWARRawData',
    'lsetapefixed'
]
zipped_production = [f'ogora{i}delimit' for i in range(2000, 2016)]

name_to_unzipped_path = {
    'BoreholeRAWData/mv_boreholes.txt': 'boreholes.csv',
    'APDRawData/mv_apddata.txt': 'permits_non_ewell_main.csv',
    'eWellAPDRawData/mv_apd_main.txt': 'permits_ewell_main.csv',
    'eWellAPDRawData/mv_apd_casing_intervals.txt': 'permits_ewell_casing_intervals.csv',
    'eWellAPDRawData/mv_apd_casing_sectons.txt': 'permits_ewell_casing_sections.csv',
    'BHPSRawData/mv_bhpsurvey.txt': 'bottomhole_pressure.csv',
    'eWellWARRawData/mv_war_main.txt': 'war_main.csv',
    'eWellWARRawData/mv_war_main_prop.txt': 'war_main_prop.csv',
    'eWellWARRawData/mv_war_tubular_summaries.txt': 'war_tubular_summaries.csv',
    'eWellWARRawData/mv_war_tubular_summaries_prop.txt': 'war_tubular_summaries_prop.csv'
}


#%% DO UNZIP ----------------------------------------------------------------------------
for file in zipped_files + zipped_production:
    zip_path = f"./{data_folder}/raw/wells/{file}.zip"
    unzip_path = f"./{data_folder}/temp/01_unzip/raw_unzip/"
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        zip_ref.extractall(unzip_path)


#%% DO UNZIP -> CSV ---------------------------------------------------------------------
for name in name_to_unzipped_path:
    read_path = f"./{data_folder}/temp/01_unzip/raw_unzip/{name}"
    write_path = f'./{data_folder}/temp/01_unzip/raw_csv/{name_to_unzipped_path[name]}'
    df = pd.read_csv(read_path, encoding_errors='ignore')
    df.to_csv(write_path)


#%% DO UNZIP -> CSV FOR PRODUCTION DATA WHICH IS IN A SLIGHTLY DIFFERENT FORMAT ---------
for i in range(2000, 2016):
    df = pd.read_csv(
        f"./{data_folder}/temp/01_unzip/raw_unzip/ogora{i}delimit.txt",
        header=None,
        names=[
            'lease_number',
            'completion_name',
            'production_date',
            'days_on_production',
            'product_code',
            'monthly_oil_volume',
            'monthly_gas_volume',
            'monthly_water_volume',
            'api',
            'well_status_code',
            'area_code',
            'operator_num',
            'operator_name',
            'field_name_code',
            'injection_volume',
            'production_interval',
            'first_production_date',
            'unit_agreement_number',
            'unit_aloc_suffix'
        ]
    )
    df.to_csv(f"./{data_folder}/temp/01_unzip/raw_csv/ogora{i}delimit.csv")


#%% GET PRICE DEFLATOR (AND CLEAN UP) ---------------------------------------------------

# Get monthly deflator
df_deflator = pd.read_csv(f'./{data_folder}/raw/GDPDEF.csv')
df_deflator.columns = df_deflator.columns.str.lower()
df_deflator['date'] = pd.to_datetime(df_deflator['date']).dt.to_period('M')
df_deflator_month = df_deflator.set_index('date').resample('M').interpolate().reset_index()
df_deflator_month.to_csv('data_py/processed/deflator_month.csv')

# Get daily deflator
df_deflator = pd.read_csv(f'./{data_folder}/raw/GDPDEF.csv')
df_deflator.columns = df_deflator.columns.str.lower()
df_deflator['date'] = pd.to_datetime(df_deflator['date']).dt.to_period('D')
df_deflator_daily = df_deflator.set_index('date').resample('D').interpolate().reset_index()
df_deflator_daily.to_csv('data_py/processed/deflator_daily.csv')


#%% DO UNZIP -> CSV FOR LEASE DATA WHICH IS IN A SLIGHTLY DIFFERENT FORMAT --------------
column_locations = [(0, 8), (50, 58), (60, 68), (122, 136)]
df = pd.read_fwf(f'./{data_folder}/temp/01_unzip/raw_unzip/LSETAPE.DAT', colspecs=column_locations)
df.columns = ['botm_lease_number', 'effective_date', 'expiry_date', 'bid']
df['botm_lease_number'] = df['botm_lease_number'].str.lstrip()
df['effective_date'] = pd.to_datetime(df['effective_date'], format="%Y%m%d")
df['expiry_date'] = pd.to_datetime(df['expiry_date'], format="%Y%m%d")

df_deflator_daily['date'] = pd.to_datetime(df_deflator_daily['date'].astype(str))
df = df.merge(
    df_deflator_daily,
    left_on='effective_date',
    right_on='date',
    how='left'
)
df['bid'] = df['bid'] * (100 / df['gdpdef'])
df.to_csv(f'./{data_folder}/temp/01_unzip/raw_csv/lease.csv')
